1 Load packages and functions

1.1 Packages

library(ape)
library(phangorn)
library(caper)
library(tidyverse)
library(ggtree)
library(picante)
library(brms)
library(phangorn)
library(phytools)
library(treeio)
library(MASS)
library(car)
library(corrplot)
library(emmeans)
library(broom)
library(ggdist)
library(tidybayes)
library(raster)
library(sf)
library(exactextractr)
library(performance)

1.2 Functions

##Check ultrametric and/or fix
check_and_fix_ultrametric <- function(phy){
  
  if (!is.ultrametric(phy)){
    
    vv <- vcv.phylo(phy)
    dx <- diag(vv)
    mxx <- max(dx) - dx
    for (i in 1:length(mxx)){
      phy$edge.length[phy$edge[,2] == i] <- phy$edge.length[phy$edge[,2] == i] + mxx[i]
    }
    if (!is.ultrametric(phy)){
      stop("Ultrametric fix failed\n")
    }   
  }
  
  return(phy)
}
##Removed duplicated tips
remove_duplicate_tips<-function(tree){
for(spe in unique(tree$tip.label)){
  pos<-grep(paste("\\b",spe,"\\b",sep=""),tree$tip.label)
  if(length(pos)>1){
    rem<-pos[2:length(pos)]
    tree<-ape::drop.tip(phy=tree,tip=rem)
  } 
}
return(tree)
}

## Function for renaming tips
rename.tips.phylo <- function(tree, names) {
    tree$tip.label <- names
    return(tree)
}

#Stardarize variables

standard_varibles<-function(frame_data){
  for(nom in names(frame_data)){
    frame_data<-data.frame(frame_data)
    if(class(frame_data[,nom])!="numeric"){next}
    frame_data[,nom]<-scale(frame_data[,nom])[,1]
  }
  return(frame_data)
}


#Standard error function
se <- function(x) sd(x)/sqrt(length(x))

#Mean function 

meanfun <- function(data, i){
  d <- data[i, ]
  return(mean(d))   
}

#Variation coefficient

var_coef <- function(x, na.rm = FALSE) {
  sd(x, na.rm=na.rm) / mean(x, na.rm=na.rm)
}

2 Phylogeny setting

We are going to load the phylogenies of the two main families (Araneidae and Theridiidae) obtained in BEAST to removed duplicated tips and prune some outgroups to generate a single phylogeny for further analyses.

2.1 Araneidae phylogeny

#Load Araneidae tree
mcc_tree<-read.nexus("araneidae_new_final.tre")
#load fixed tip names
nam_tree<-read_csv("tip_names_araneidae.csv",col_names=T)
mcc_tree$tip.label<-nam_tree$corrected_name
###Remove repeated tips
tree_removedTips<-remove_duplicate_tips(mcc_tree)
##Remove tips to mix with Theridiidae tree
core<-extract.clade(phy=tree_removedTips,node=c(194), collapse.singles = TRUE,interactive = FALSE)
outgroups<- tree_removedTips$tip.label[which(tree_removedTips$tip.label %in% core$tip.label==FALSE)]
outgroups_araneidae<-outgroups
tree_removedTips<-drop.tip(tree_removedTips,outgroups)

2.2 Theridiidae phylogeny

#Load Theridiidae tree
tree_theridiidae<-read.nexus("total_Theridiidae_tree.tre")
#load fixed tip names
nam_tree<-read_delim("theridiidae_tips.txt",col_names=F)
tree_theridiidae$tip.label<-nam_tree$X2
#Remove repeated tips
tree_removed_theridiidae<-remove_duplicate_tips(tree_theridiidae)
#remove problematic tips
problematic_tips<-c("Chrysso_albipes","Chrysso_sp","Erigone_dentosa")
tree_removed_theridiidae<-drop.tip(tree_removed_theridiidae,c(problematic_tips))
#Identify the outgroup
plotTree(tree_removed_theridiidae) 

#nodelabels()
outgroups<-extract.clade(phy=tree_removed_theridiidae, node=304, root.edge = 0, collapse.singles = TRUE,interactive = FALSE) #Keep an eye on the node
outgroups_theridiidae<-outgroups

Now with the two phylogenies, we are going to join them fro further analyses

2.3 Join the phylogenies

calib<-makeChronosCalib(tree_removed_theridiidae, age.min = max(node.depth.edgelength(tree_removedTips)), age.max = max(node.depth.edgelength(tree_removedTips)))
tmp_t<-chronos(tree_removed_theridiidae, lambda = 1, model = "correlated", quiet = FALSE,
        calibration = calib,
        control = chronos.control())
## 
## Setting initial dates...
## Fitting in progress... get a first set of estimates
##          (Penalised) log-lik = -628.3153 
## Optimising rates... dates... -628.3153 
## 
## log-Lik = -628.3153 
## PHIIC = 2190.63
joint_trees_outgroups<-bind.tree(tree_removedTips,tmp_t, interactive = FALSE)
joint_trees<-bind.tree(tree_removedTips, ape::drop.tip(phy=tmp_t,outgroups$tip.label), interactive = FALSE)

# get scaled edge.length
joint_trees$edge.length <- joint_trees$edge.length / (max(joint_trees$edge.length))

#Remove duplicate tips
joint_trees<-remove_duplicate_tips(joint_trees)
#Check that the final tree is ultrametric
joint_trees<-check_and_fix_ultrametric(joint_trees)

3 Load data and data matching

#load the csv file
join_dataset<-read_csv("data_total.csv",col_names=T)
#replace spaces in species names
join_dataset$species<-gsub(pattern=" ", replacement="_",join_dataset$species)
#keep unique rows
join_dataset<-distinct(join_dataset,species,.keep_all = TRUE)
#Tranform presence in islands to a binary variable
join_dataset<-join_dataset %>% mutate(bin_island=ifelse(cat_island=="island"|cat_island=="island_continent",1,0))
#replace spaces in species names on the tree
join_tree<-multi2di(joint_trees)
join_tree$tip.label<-gsub(pattern=" ", replacement="_",join_tree$tip.label)
##Check if the table match the tree tips
#remove species that are not in the phylogeny
join_dataset<-join_dataset[join_dataset$species %in% join_tree$tip.label,]
##Add species with no information into the phylogeny, like XX_sp
for(spe in unique(join_tree$tip.label)){
  #Remove 
  if(spe %in% join_dataset$species==FALSE){
    print(spe)
    join_dataset<-join_dataset %>% add_row(species=spe)
  } 
}

#Let's modify the dataset to deal with colour polytipic species
join_dataset$polymorphism[which(join_dataset$polymorphism %in% c("polytipic","possible polytipic","pattern variable")==TRUE)]<-NA
join_dataset<-join_dataset %>% filter(!is.na(polymorphism))

dataset_all_species_phylogeny<-join_dataset

Now we have a single and a dataset that match each other.

4 Phylogeny visualization

Let’s see how is the presence of colour polymorphism present in the phylogeny

#Change tree name
to_plot_tree<-join_tree
#Find colour polymorphic lineages
otus<-join_dataset %>% filter(polymorphism=="yes") %>% pull(species)
to_plot_tree<-groupOTU(to_plot_tree, otus)
df_polymorphism<-data.frame(join_dataset$polymorphism)
#df_island<-data.frame(as.character(join_dataset$bin_island))
rownames(df_polymorphism)<-join_dataset$species
#plot tree with names
p<-ggtree(to_plot_tree, layout='circular') + geom_tiplab()
#pdf("total_tree_names.pdf", width=20,height=20)
plot(p)

#dev.off()
#plot tree colour polymorphism
p<-ggtree(to_plot_tree, layout='circular')
#pdf("total_tree_polymorphism.pdf", width=20,height=20)
gheatmap(p, df_polymorphism, offset=.001, width=.08,colnames = FALSE, colnames_offset_y = 1)+scale_fill_manual(values=c("#1ABEC6","#FF5B00"),name="Presence of\ncolour polymorphism")

#dev.off()

5 Filter species with poor geographical information

Arachnids is one of the groups with the poorest geographic information available in public databases.For instance, in our data ~51% of the species has less than 50 geographical records

species_points<-join_dataset %>% drop_na(n_points)
species_geo<-nrow(species_points[species_points$n_points<50,])/nrow(species_points)*100
print(paste0(species_geo,"%"," of the species with geographical information has less than 50 geographical records"))
## [1] "51.5625% of the species with geographical information has less than 50 geographical records"

To account for this, we decided to calculated the mean and its 95% confidence interval (CI) for the number of geographical records available for all the species. We excluded species from the subsequent analyses that fell outside the lower CI.

#Due to high vari
l_points<-na.omit(log(join_dataset$n_points))
##Let's calculate the 95% around the mean
library(boot)
data <- data.frame(xs = l_points)
bo <- boot(data[, "xs", drop = FALSE], statistic=meanfun, R=5000)
mean_ci<-boot.ci(bo, conf=0.95, type="bca")

ggplot(tibble(x=bo$t[,1]), aes(x=x)) +geom_density()+geom_segment(x=mean_ci$bca[4],xend=mean_ci$bca[5],y=0,yend=0,color="blue",size=2,lineend="round")

##Remove species with low number of records
datos_filtered<-join_dataset %>% filter(n_points>=exp(mean_ci$bca[4]))

#let's keep this filtered dataset for further analyses

data_filtered_phylogeny<-datos_filtered

6 Statistical models with the remaining species

all the predictors seems skewed or not uniform distributed, let’s modify some predictors that may affect the regression due to their non-normal distribution

datos_filtered$T_centroid_lat<-abs(datos_filtered$centroid_lat)   
datos_filtered$T_lat_range<-sqrt(abs(datos_filtered$lat_range))
datos_filtered$T_area_polygon<-sqrt(datos_filtered$area_polygon+1)
datos_filtered$T_lat_range_wsc<-abs(datos_filtered$lat_range_wsc)
datos_filtered$T_area_countries_wsc<-sqrt(datos_filtered$area_countries_wsc)
datos_filtered$T_points<-log(datos_filtered$n_points)
datos_filtered$T_bole<-log(datos_filtered$bole_female)

6.1 Analyses total samples

Remove species from islands that can affect calculations due to their geographic limit for dispersion

datos_filtered_total<-datos_filtered %>% filter(cat_island != "island") %>% data.frame()

Number of colour monomorphic and polymorphic species

table(datos_filtered_total$polymorphism)
## 
##  no yes 
##  59  33

Correlation plot between the variables

cor_matrix <- cor(na.omit(datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")]))

colnames(cor_matrix)<c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
rownames(cor_matrix)<-c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")
par(mfrow=c(1,1))
corrplot(cor_matrix, method = "number", type = "upper", order = "original", tl.cex=1, )

Standardize variable before the analysis, excluding the count variables

#datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]<-standard_varibles(datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]) %>% data.frame()

Now, let’s prepare the dataset and tree so they match, this is super important. Your phylogeny names need to match a column of data

Let’s run the models!

6.1.1 Association between colour polymorphism and number of records and latitude of the centroid

Evaluate if the colour monomorphic and colour polymorphic species differ in the number of records

set.seed(30011994)
brm_points <- brm(
 n_points ~ polymorphism,
  data = datos_filtered_total,
  family = negbinomial(),
  #prior = prior,
  #data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=10)
  ,chains = 4, cores = 4, iter = 20000
 ,file="points1.rda"
)

pairs(brm_points)

summary(brm_points)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: n_points ~ polymorphism 
##    Data: datos_filtered_total (Number of observations: 92) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           7.14      0.17     6.82     7.49 1.00    25569    21635
## polymorphismyes     0.55      0.28     0.01     1.12 1.00    26066    22390
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     0.60      0.07     0.47     0.76 1.00    27048    23466
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Check if the predicted values fits the rawdata
#r2
r2_bayes(brm_points)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.040 (95% CI [8.475e-10, 0.160])
#
pp_check(brm_points)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(brm_points))

They do not have differences in the number of records

Evaluate if the colour monomorphic and colour polymorphic species differ in the latitude of the centroid

set.seed(30011994)
brm_centroid <- brm(
  T_centroid_lat ~ polymorphism,
  data = datos_filtered_total,
  family = skew_normal(),
  #prior = prior,
#  data2 = list(A = A),
  control = list(adapt_delta = 0.99999, max_treedepth=20)
  ,chains = 4, cores = 4, iter = 20000
,file="centroid1.rda"
)

pairs(brm_centroid)

summary(brm_centroid)
##  Family: skew_normal 
##   Links: mu = identity; sigma = identity; alpha = identity 
## Formula: T_centroid_lat ~ polymorphism 
##    Data: datos_filtered_total (Number of observations: 91) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept          36.74      2.42    31.91    41.44 1.00    25713    22173
## polymorphismyes    -4.90      3.86   -12.52     2.66 1.00    26546    24053
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma    18.80      1.53    16.10    22.12 1.00    22032    20805
## alpha    -1.73      1.78    -5.22     1.39 1.00    18590    22096
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_centroid)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.017 (95% CI [2.460e-11, 0.081])
## Check if the predicted values fits the rawdata
pp_check(brm_centroid)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(brm_centroid))

They do not have differences in the latitude of the centroid

Evaluate if the colour monomorphic and colour polymorphic species differ in the body length

set.seed(30011994)
brm_bole <- brm(
  T_bole ~ polymorphism,
  data = datos_filtered_total,
  family = gaussian(),
  #prior = prior,
  #data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=10)
  ,chains = 4, cores = 4, iter = 20000
  ,file="bole1.rda"
)

pairs(brm_bole)

summary(brm_bole)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: T_bole ~ polymorphism 
##    Data: datos_filtered_total (Number of observations: 91) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           2.12      0.08     1.97     2.28 1.00    29096    22723
## polymorphismyes     0.16      0.14    -0.11     0.43 1.00    27882    22512
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.62      0.05     0.54     0.72 1.00    28708    23708
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Check if the predicted values fits the rawdata
#r2
r2_bayes(brm_bole)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.015 (95% CI [2.925e-12, 0.082])
pp_check(brm_bole)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(brm_bole))

They do not have differences in body length

let’s Evaluate the association fo the predictors

lm_lat_range <- lm(T_lat_range ~ polymorphism+T_bole+T_centroid_lat, data=datos_filtered_total)

check_collinearity(lm_lat_range)

The predictors are not collinear, we can use all of them in the models

6.1.2 Latitudinal range

set.seed(30011994)
brm_latrange_1 <- brm(
 T_lat_range ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
  data = datos_filtered_total,
  family = gaussian(),
  #prior = prior,
  data2 = list(A = A),
  control = list(adapt_delta = 0.9999, max_treedepth=11)
  ,chains = 4, cores = 4, iter = 20000
 ,file="latrange1.rda"
)

pairs(brm_latrange_1)

summary(brm_latrange_1)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: T_lat_range ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A)) 
##    Data: datos_filtered_total (Number of observations: 90) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~species (Number of levels: 90) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.88      0.38     0.12     1.61 1.00     2319     3305
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           5.78      0.84     4.12     7.43 1.00    28876    28641
## polymorphismyes     0.67      0.31     0.06     1.27 1.00    19696    25067
## T_bole              0.28      0.29    -0.29     0.85 1.00    30353    29554
## T_centroid_lat     -0.03      0.01    -0.05    -0.02 1.00    25875    29203
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     1.02      0.19     0.60     1.36 1.00     2450     2632
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Check if the predicted values fits the rawdata
#r2
r2_bayes(brm_latrange_1)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.475 (95% CI [0.177, 0.802])
##      Marginal R2: 0.299 (95% CI [0.145, 0.427])
pp_check(brm_latrange_1)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(brm_latrange_1))

6.1.3 Range size calculated with the area of the Convex polygon

area_polygon_1 <- brm(
 T_area_polygon ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)),
  data = datos_filtered_total,
  family = skew_normal(),
  #prior = prior,
  data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=10)
  ,chains = 4, cores = 4, iter = 20000
 ,file="areapolygon1.rda"
)

pairs(area_polygon_1)

summary(area_polygon_1)
##  Family: skew_normal 
##   Links: mu = identity; sigma = identity; alpha = identity 
## Formula: T_area_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A)) 
##    Data: datos_filtered_total (Number of observations: 89) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~species (Number of levels: 89) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)   334.81    208.47    17.94   783.59 1.00     8654    16747
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        1437.14    669.67   125.38  2773.04 1.00    45615    30231
## polymorphismyes   371.37    248.65  -114.24   866.03 1.00    47901    29163
## T_bole            296.21    230.37  -158.65   748.90 1.00    53549    32232
## T_centroid_lat      8.04      8.09    -8.05    23.77 1.00    46338    31192
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma  1140.77    105.83   944.89  1359.32 1.00    24904    24081
## alpha     3.57      1.60     1.38     7.67 1.00    21105    16581
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Check if the predicted values fits the rawdata
#r2
r2_bayes(area_polygon_1)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.121 (95% CI [0.010, 0.276])
##      Marginal R2: 0.068 (95% CI [0.002, 0.162])
pp_check(area_polygon_1)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(area_polygon_1))

6.1.4 Ecological regions

To indirectly explore the difference in niche width between colour monomorphic and polymorphic species, we measured the number of ecological regions occupy by each species using the geographical records and polygons. the ecological regions were obtained here

BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on geographical records

set.seed(30011994)
eco_reg_points_1 <- brm(
 eco_reg_points ~  polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
  data = datos_filtered_total,
  family = poisson(),
  #prior = prior,
  data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=20)
  ,chains = 4, cores = 4, iter = 20000
 ,file="ecoregpoints1.rda"
)

pairs(eco_reg_points_1)

summary(eco_reg_points_1)
##  Family: poisson 
##   Links: mu = log 
## Formula: eco_reg_points ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A)) 
##    Data: datos_filtered_total (Number of observations: 90) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~species (Number of levels: 90) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.26      0.12     1.05     1.51 1.00     7774    14771
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           1.59      0.71     0.18     2.98 1.00     6148    11039
## polymorphismyes     0.42      0.21     0.01     0.84 1.00     6047    11417
## T_bole              0.54      0.23     0.10     0.99 1.00     6184    11667
## T_centroid_lat      0.01      0.01    -0.00     0.02 1.00     6466    11658
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Check if the predicted values fits the rawdata
#r2
r2_bayes(eco_reg_points_1)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.964 (95% CI [0.948, 0.977])
##      Marginal R2: 0.151 (95% CI [0.002, 0.434])
pp_check(eco_reg_points_1)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(eco_reg_points_1))

BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on polygon

eco_reg_polygon_1 <- brm(
 eco_reg_polygon~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
  data = datos_filtered_total,
  family = poisson(),
  #prior = prior,
  data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=20)
  ,chains = 4, cores = 4, iter = 20000
 ,file="ecoregpol1.rda"
)

pairs(eco_reg_polygon_1)

summary(eco_reg_polygon_1)
##  Family: poisson 
##   Links: mu = log 
## Formula: eco_reg_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A)) 
##    Data: datos_filtered_total (Number of observations: 89) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~species (Number of levels: 89) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.33      0.12     1.13     1.58 1.00     5164     9918
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           2.89      0.74     1.46     4.35 1.00     4746     8849
## polymorphismyes     0.44      0.22     0.01     0.86 1.00     4689     8391
## T_bole              0.57      0.23     0.11     1.03 1.00     4978     9099
## T_centroid_lat     -0.01      0.01    -0.02     0.00 1.00     4667     8828
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(eco_reg_polygon_1)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.985 (95% CI [0.977, 0.991])
##      Marginal R2: 0.223 (95% CI [0.017, 0.508])
## Check if the predicted values fits the rawdata
pp_check(eco_reg_polygon_1)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(eco_reg_polygon_1))

6.1.5 Climatic zones

Addtionally, we also explored if colour monomorphic and polymorphic species differ in the number of climatic zones they occupy. We measured the number of climatic zones for each species using the geographical records and polygons. the Köppen-Geiger climate classification zones were obtained here

temp_zones_points_1 <- brm(
 temp_zones_points~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
  data = datos_filtered_total,
  family = negbinomial(),
  #prior = prior,
  data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=20)
  ,chains = 4, cores = 4, iter = 20000
 ,file="climpoint1.rda"
)

pairs(temp_zones_points_1)

summary(temp_zones_points_1)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: temp_zones_points ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A)) 
##    Data: datos_filtered_total (Number of observations: 90) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~species (Number of levels: 90) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.12      0.08     0.01     0.30 1.00     8401    15556
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           1.16      0.24     0.67     1.63 1.00    36082    26672
## polymorphismyes     0.18      0.09    -0.01     0.36 1.00    42445    30413
## T_bole              0.32      0.08     0.15     0.48 1.00    40149    28908
## T_centroid_lat      0.01      0.00     0.00     0.01 1.00    47812    30646
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape    52.83     50.14    11.90   195.60 1.00    29584    27642
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(temp_zones_points_1)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.277 (95% CI [0.113, 0.463])
##      Marginal R2: 0.233 (95% CI [0.081, 0.392])
## Check if the predicted values fits the rawdata
pp_check(temp_zones_points_1)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(temp_zones_points_1))

temp_zones_polygon_1 <- brm(
 temp_zones_polygon~polymorphism+T_bole+T_centroid_lat+ (1| gr(species, cov = A)) ,
  data = datos_filtered_total,
  family = negbinomial(),
  #prior = prior,
  data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=20)
  ,chains = 4, cores = 4, iter = 20000
 ,file="climpol1.rda"
)

pairs(temp_zones_polygon_1)

summary(temp_zones_polygon_1)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: temp_zones_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A)) 
##    Data: datos_filtered_total (Number of observations: 89) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~species (Number of levels: 89) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.09      0.07     0.00     0.26 1.00    11792    18059
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           2.82      0.25     2.33     3.32 1.00    41709    30165
## polymorphismyes    -0.02      0.10    -0.22     0.17 1.00    49572    31171
## T_bole              0.04      0.09    -0.13     0.22 1.00    40894    31970
## T_centroid_lat     -0.01      0.00    -0.02    -0.00 1.00    49852    31213
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape    10.12      3.72     5.45    18.96 1.00    33367    22063
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(temp_zones_polygon_1)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.251 (95% CI [0.100, 0.406])
##      Marginal R2: 0.208 (95% CI [0.066, 0.356])
## Check if the predicted values fits the rawdata
pp_check(temp_zones_polygon_1)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(temp_zones_polygon_1))

all_models<-NULL

for(mod in c("brm_latrange_1","area_polygon_1","eco_reg_points_1","eco_reg_polygon_1","temp_zones_polygon_1","temp_zones_points_1")){
baye_mode = get(mod)
  bayes_results<-baye_mode %>% 
  spread_draws(b_polymorphismyes)
  bayes_results<-tibble(b_polymorphismyes=bayes_results$b_polymorphismyes,model=mod)
  all_models<-bind_rows(all_models, bayes_results)
} 

all_models %>% ggplot(aes(y = model, x = b_polymorphismyes)) +
  stat_halfeye()+
  theme_classic()+
  geom_vline(xintercept = 0, linetype = "dashed",col="black",size=1)+
  labs(x="Estimate",y="Models")

Plot of the model

paleta1<-c("#1ABEC6","#FF5B00")

dataset_plot<-datos_filtered_total %>% drop_na(T_lat_range|polymorphism|T_bole|T_centroid_lat)

dataset_plot$predict<-predict(brm_latrange_1,type="response")[,"Estimate"]

dataset_plot %>% ggplot(aes(x=polymorphism,y=predict,fill=polymorphism))+geom_point(aes(x=polymorphism,y=T_lat_range),shape = 21,size=3, position = position_jitterdodge(),alpha=0.5)+geom_violin(aes(x=polymorphism,y=T_lat_range),alpha=0.1, position = position_dodge(width = .75),size=1)+
  stat_summary(fun = mean,aes(color = polymorphism,group=polymorphism),fun.min = function(x) mean(x) - (2*se(x)),fun.max = function(x) mean(x)+(2*se(x)),geom = "pointrange",shape=22,size=1.5,col="black")+scale_fill_manual(values=paleta1)+scale_colour_manual(values=paleta1)+theme_classic()+labs(x="Colour polymorphism",y="Latitudinal range")

6.2 Predictive confidence interval

To eliminate any false association caused by sampling bias, we repeated the above analyses with a reduced dataset. The subset was created by calculating a linear regression between the number of geographical records and the geographical area of the regions described in the WSC (a positive relationship), and then discarding species outside the lower boundary of the 50% predictive confidence interval (Quantile 0.75 and Quantile 0.25). In this way we only kept species with a small number of records when their WSC calculated range was calculated as very small (Predictive interval subset; supplementary figure 2). This approach is different from using a threshold for the number of points because it acknowledges that some species will have fewer records if their range is very restricted.

Let’s generate the dataset

no_island<-datos_filtered %>% filter(cat_island!="island") %>% drop_na(n_points) %>% drop_na(area_polygon) %>%drop_na(polymorphism)

no_island<-na.omit(no_island)

#no_island[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]<-standard_varibles(no_island[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]) %>% data.frame()

lm_points<-lm(T_points~T_area_countries_wsc,data=no_island)
summary(lm_points)
## 
## Call:
## lm(formula = T_points ~ T_area_countries_wsc, data = no_island)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6301 -0.8926 -0.0480  0.8971  2.6342 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.9696268  0.5204457   7.627 5.67e-11 ***
## T_area_countries_wsc 0.0004916  0.0000971   5.063 2.80e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.311 on 76 degrees of freedom
## Multiple R-squared:  0.2522, Adjusted R-squared:  0.2424 
## F-statistic: 25.64 on 1 and 76 DF,  p-value: 2.799e-06
no_island<-cbind(no_island,predict(lm_points,interval="prediction",level=0.50))## this plots the q10% and q90%
## Warning in predict.lm(lm_points, interval = "prediction", level = 0.5): predictions on current data refer to _future_ responses
ggplot(no_island,aes(T_area_countries_wsc,T_points))+geom_point(size=3,aes(col=polymorphism))+ geom_smooth(method = "lm",level=0.99)+geom_line(aes(y=upr),col="red")+geom_line(aes(y=lwr),col="red")+theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

##subset based on the prediction intervals
pi_subset<-no_island[!no_island$T_points<no_island$lwr,]

Number of colour monomorphic and polymorphic species after filtering

table(pi_subset$polymorphism)
## 
##  no yes 
##  35  23

Correlation plot between the variables

cor_matrix <- cor(na.omit(pi_subset[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")]))

colnames(cor_matrix)<c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
rownames(cor_matrix)<-c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")
par(mfrow=c(1,1))
corrplot(cor_matrix, method = "number", type = "upper", order = "original", tl.cex=1, )

Standardize variable before the analysis, excluding the count variables

#pi_subset[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]<-standard_varibles(pi_subset[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]) %>% data.frame()

Now, let’s prepare the dataset and tree so they match, this is super important. Your phylogeny names need to match a column of data

Let’s run the models!

6.2.1 Association between colour polymorphism and number of records and latitude of the centroid

Evaluate if the colour monomorphic and colour polymorphic species differ in the number of records

set.seed(30011994)
brm_points <- brm(
 n_points ~ polymorphism,
  data = pi_subset,
  family = negbinomial(),
  #prior = prior,
  #data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=10)
  ,chains = 4, cores = 4, iter = 20000
 ,file="points2.rda"
)

pairs(brm_points)

summary(brm_points)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: n_points ~ polymorphism 
##    Data: pi_subset (Number of observations: 58) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           7.50      0.17     7.17     7.86 1.00    27591    20766
## polymorphismyes     0.53      0.28    -0.00     1.09 1.00    26398    21795
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     0.98      0.16     0.69     1.33 1.00    28316    23982
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_points)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.065 (95% CI [9.964e-11, 0.229])
## Check if the predicted values fits the rawdata
pp_check(brm_points)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(brm_points))

They do not have differences in the number of records

Evaluate if the colour monomorphic and colour polymorphic species differ in the latitude of the centroid

set.seed(30011994)
brm_centroid <- brm(
  T_centroid_lat ~ polymorphism,
  data = pi_subset,
  family = skew_normal(),
  #prior = prior,
  #data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=10)
  ,chains = 4, cores = 4, iter = 20000
  ,file="centroids2.rda"
)

pairs(brm_centroid)

summary(brm_centroid)
##  Family: skew_normal 
##   Links: mu = identity; sigma = identity; alpha = identity 
## Formula: T_centroid_lat ~ polymorphism 
##    Data: pi_subset (Number of observations: 58) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept          41.21      2.87    35.50    46.75 1.00    22737    21496
## polymorphismyes    -4.21      4.17   -12.37     4.10 1.00    25556    23960
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma    18.02      1.81    14.87    21.91 1.00    20726    21331
## alpha    -3.30      1.44    -6.18    -0.02 1.00    21378    14702
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## Check if the predicted values fits the rawdata
pp_check(brm_centroid)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(brm_centroid))

They do not have differences in the latitude of the centroid

Evaluate if the colour monomorphic and colour polymorphic species differ in the body length

set.seed(30011994)
brm_bole <- brm(
  T_bole ~ polymorphism,
  data = pi_subset,
  family = gaussian(),
  #prior = prior,
  #data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=10)
  ,chains = 4, cores = 4, iter = 20000
  ,file="bole2.rda"
)

pairs(brm_bole)

summary(brm_bole)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: T_bole ~ polymorphism 
##    Data: pi_subset (Number of observations: 58) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           2.11      0.12     1.89     2.34 1.00    26598    22972
## polymorphismyes     0.17      0.18    -0.19     0.53 1.00    26092    21577
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.68      0.07     0.57     0.83 1.00    24965    22613
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_bole)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.018 (95% CI [7.630e-11, 0.105])
## Check if the predicted values fits the rawdata
pp_check(brm_bole)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(brm_bole))

They do not have differences in body length

let’s Evaluate the association fo the predictors

lm_lat_range <- lm(T_lat_range ~ polymorphism+T_bole+T_centroid_lat, data=pi_subset)

check_collinearity(lm_lat_range)

The predictors are not collinear, we can use all of them in the models

6.2.2 Latitudinal range

set.seed(30011994)
brm_latrange_2 <- brm(
 T_lat_range ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
  data = pi_subset,
  family = student(),
  #prior = prior,
  data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=10)
  ,chains = 4, cores = 4, iter = 20000
 ,file="latrange2.rda"
)

pairs(brm_latrange_2)

summary(brm_latrange_2)
## Warning: There were 2 divergent transitions after warmup. Increasing
## adapt_delta above 0.999 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
##  Family: student 
##   Links: mu = identity; sigma = identity; nu = identity 
## Formula: T_lat_range ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A)) 
##    Data: pi_subset (Number of observations: 58) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~species (Number of levels: 58) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.51      0.38     0.02     1.35 1.00     2282     2244
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           6.77      0.88     5.01     8.50 1.00    21853    21648
## polymorphismyes     0.44      0.30    -0.16     1.03 1.00    32376    27705
## T_bole              0.10      0.27    -0.43     0.62 1.00    28391    28281
## T_centroid_lat     -0.04      0.01    -0.06    -0.02 1.00    16499    17343
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.88      0.18     0.45     1.20 1.00     2843     2146
## nu       20.30     13.87     3.11    55.34 1.00    29240    14341
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_latrange_2)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.445 (95% CI [0.190, 0.847])
##      Marginal R2: 0.371 (95% CI [0.172, 0.527])
## Check if the predicted values fits the rawdata
pp_check(brm_latrange_2)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(brm_latrange_2))

6.2.3 Range size calculated with the area of the Convex polygon

area_polygon_2 <- brm(
 T_area_polygon ~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)),
  data = pi_subset,
  family = skew_normal(),
  #prior = prior,
  data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=10)
  ,chains = 4, cores = 4, iter = 20000
 ,file="areapolygon2.rda"
)

pairs(area_polygon_2)

summary(area_polygon_2)
## Warning: There were 1 divergent transitions after warmup. Increasing
## adapt_delta above 0.999 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
##  Family: skew_normal 
##   Links: mu = identity; sigma = identity; alpha = identity 
## Formula: T_area_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A)) 
##    Data: pi_subset (Number of observations: 58) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~species (Number of levels: 58) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)   485.26    291.49    26.87  1102.46 1.00     4905     7788
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        2251.13    861.99   559.90  3967.39 1.00    28479    27991
## polymorphismyes   223.15    318.85  -389.81   868.10 1.00    25841    26459
## T_bole            216.46    265.29  -302.04   735.11 1.00    26134    27648
## T_centroid_lat     -0.72     11.27   -22.87    21.65 1.00    30983    29762
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma  1051.28    151.46   747.52  1348.52 1.00     6977     7184
## alpha     3.53      2.20    -0.57     8.63 1.00    15454    19393
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(area_polygon_2)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.187 (95% CI [0.006, 0.488])
##      Marginal R2: 0.066 (95% CI [5.615e-04, 0.197])
## Check if the predicted values fits the rawdata
pp_check(area_polygon_2)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(area_polygon_2))

6.2.4 Ecological regions

To indirectly explore the difference in niche width between colour monomorphic and polymorphic species, we measured the number of ecological regions occupy by each species using the geographical records and polygons. the ecological regions were obtained here

BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on geographical records

set.seed(30011994)
eco_reg_points_2 <- brm(
 eco_reg_points ~  polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
  data = pi_subset,
  family = poisson(),
  #prior = prior,
  data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=20)
  ,chains = 4, cores = 4, iter = 20000
 ,file="ecoreg_points2.rda"
)

pairs(eco_reg_points_2)

summary(eco_reg_points_2)
##  Family: poisson 
##   Links: mu = log 
## Formula: eco_reg_points ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A)) 
##    Data: pi_subset (Number of observations: 58) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~species (Number of levels: 58) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.89      0.10     0.71     1.12 1.00     7911    13420
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           2.46      0.61     1.25     3.64 1.00     8006    13396
## polymorphismyes     0.31      0.18    -0.05     0.67 1.00     8540    13143
## T_bole              0.41      0.19     0.04     0.79 1.00     9042    14262
## T_centroid_lat     -0.00      0.01    -0.01     0.01 1.00     8350    13036
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(eco_reg_points_2)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.963 (95% CI [0.943, 0.979])
##      Marginal R2: 0.146 (95% CI [0.002, 0.407])
## Check if the predicted values fits the rawdata
pp_check(eco_reg_points_2)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(eco_reg_points_2))

BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on polygon

eco_reg_polygon_2 <- brm(
 eco_reg_polygon~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
  data = pi_subset,
  family = poisson(),
  #prior = prior,
  data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=20)
  ,chains = 4, cores = 4, iter = 20000
 ,file="ecoreg_pol2.rda"
)

pairs(eco_reg_polygon_2)

summary(eco_reg_polygon_2)
##  Family: poisson 
##   Links: mu = log 
## Formula: eco_reg_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A)) 
##    Data: pi_subset (Number of observations: 58) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~species (Number of levels: 58) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.03      0.11     0.84     1.28 1.00     6840    14036
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           3.45      0.68     2.12     4.77 1.00     6337    12509
## polymorphismyes     0.31      0.20    -0.08     0.71 1.00     6977    12265
## T_bole              0.31      0.21    -0.10     0.73 1.00     7053    11859
## T_centroid_lat     -0.01      0.01    -0.02     0.01 1.00     6176    11849
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(eco_reg_polygon_2)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.980 (95% CI [0.969, 0.988])
##      Marginal R2: 0.133 (95% CI [6.729e-04, 0.418])
## Check if the predicted values fits the rawdata
pp_check(eco_reg_polygon_2)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(eco_reg_polygon_2))

6.2.5 Climatic zones

Addtionally, we also explored if colour monomorphic and polymorphic species differ in the number of climatic zones they occupy. We measured the number of climatic zones for each species using the geographical records and polygons. the Köppen-Geiger climate classification zones were obtained here

temp_zones_points_2 <- brm(
 temp_zones_points~ polymorphism+T_bole+T_centroid_lat + (1| gr(species, cov = A)) ,
  data = pi_subset,
  family = negbinomial(),
  #prior = prior,
  data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=20)
  ,chains = 4, cores = 4, iter = 20000
 ,file="clim_points2.rda"
)

pairs(temp_zones_points_2)

summary(temp_zones_points_2)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: temp_zones_points ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A)) 
##    Data: pi_subset (Number of observations: 58) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~species (Number of levels: 58) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.08      0.06     0.00     0.23 1.00    15567    18304
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           1.57      0.26     1.06     2.08 1.00    39201    28484
## polymorphismyes     0.11      0.10    -0.08     0.30 1.00    50836    31078
## T_bole              0.24      0.08     0.07     0.40 1.00    41984    29099
## T_centroid_lat      0.00      0.00    -0.00     0.01 1.00    45141    31680
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape    99.60     75.08    21.38   299.95 1.00    53987    32100
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(temp_zones_points_2)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.293 (95% CI [0.096, 0.471])
##      Marginal R2: 0.250 (95% CI [0.059, 0.422])
## Check if the predicted values fits the rawdata
pp_check(temp_zones_points_2)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(temp_zones_points_2))

temp_zones_polygon_2 <- brm(
 temp_zones_polygon~polymorphism+T_bole+T_centroid_lat+ (1| gr(species, cov = A)) ,
  data = pi_subset,
  family = negbinomial(),
  #prior = prior,
  data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=20)
  ,chains = 4, cores = 4, iter = 20000
 ,file="clim_pol2.rda"
)

pairs(temp_zones_polygon_2)

summary(temp_zones_polygon_2)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: temp_zones_polygon ~ polymorphism + T_bole + T_centroid_lat + (1 | gr(species, cov = A)) 
##    Data: pi_subset (Number of observations: 58) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~species (Number of levels: 58) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.13      0.09     0.01     0.35 1.00     9703    13603
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           3.20      0.33     2.57     3.86 1.00    39482    28967
## polymorphismyes    -0.09      0.12    -0.33     0.16 1.00    49267    30590
## T_bole             -0.01      0.11    -0.23     0.19 1.00    36329    28960
## T_centroid_lat     -0.01      0.00    -0.02    -0.01 1.00    42875    30475
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape    11.37      8.71     4.67    28.83 1.00    20958    13902
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(temp_zones_polygon_2)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.399 (95% CI [0.183, 0.561])
##      Marginal R2: 0.335 (95% CI [0.133, 0.519])
## Check if the predicted values fits the rawdata
pp_check(temp_zones_polygon_2)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(temp_zones_polygon_2))

all_models<-NULL

for(mod in c("brm_latrange_2","area_polygon_2","eco_reg_points_2","eco_reg_polygon_2","temp_zones_polygon_2","temp_zones_points_2")){
baye_mode = get(mod)
  bayes_results<-baye_mode %>% 
  spread_draws(b_polymorphismyes)
  bayes_results<-tibble(b_polymorphismyes=bayes_results$b_polymorphismyes,model=mod)
  all_models<-bind_rows(all_models, bayes_results)
} 

all_models %>% ggplot(aes(y = model, x = b_polymorphismyes)) +
  stat_halfeye()+
  theme_classic()+
  geom_vline(xintercept = 0, linetype = "dashed",col="black",size=1)+
  labs(x="Estimate",y="Models")

Plot of the model

paleta1<-c("#1ABEC6","#FF5B00")

dataset_plot<-pi_subset %>% drop_na(T_lat_range|polymorphism|T_bole|T_centroid_lat)

dataset_plot$predict<-predict(brm_latrange_2,type="response")[,"Estimate"]

dataset_plot %>% ggplot(aes(x=polymorphism,y=predict,fill=polymorphism))+geom_point(aes(x=polymorphism,y=T_lat_range),shape = 21,size=3, position = position_jitterdodge(),alpha=0.5)+geom_violin(aes(x=polymorphism,y=T_lat_range),alpha=0.1, position = position_dodge(width = .75),size=1)+
  stat_summary(fun = mean,aes(color = polymorphism,group=polymorphism),fun.min = function(x) mean(x) - (2*se(x)),fun.max = function(x) mean(x)+(2*se(x)),geom = "pointrange",shape=22,size=1.5,col="black")+scale_fill_manual(values=paleta1)+scale_colour_manual(values=paleta1)+theme_classic()+labs(x="Colour polymorphism",y="Latitudinal range")

7 models without considering the phylogeny

We observed that the models with continous variables have values close to 0 and that the climatic zones models have random effects low variaces close to 0. This means that the phylogentic relationships of the individuals are not having a major effect on these models.

hyp <- "sd_species__Intercept^2 / (sd_species__Intercept^2 + sigma^2) = 0"
lat_range_sig <- hypothesis(brm_latrange_1, hyp, class = NULL)
area_sig<-hypothesis(area_polygon_1, hyp, class = NULL)

ggplot() + geom_histogram(aes(x = lat_range_sig$samples$H1, fill="Latitudinal range model"), alpha = 0.5)+
  geom_histogram(aes(x = area_sig$samples$H1,fill="Area polygon model"), alpha = 0.5)+labs(x="Pagel's lambda",y="Count")+theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

In consequence, we decided to run a set of models without considering the phylogeny. This let us include more species with good geographic records but that were not include in the phylogenetic reconstruction due to lack of genetic data.

7.1 Reload data

join_dataset<-read_csv("data_total.csv",col_names=T)
join_dataset$species<-gsub(pattern=" ", replacement="_",join_dataset$species)
#keep unique rows
join_dataset<-distinct(join_dataset,species,.keep_all = TRUE)
###Tranform island as binary
join_dataset<-join_dataset %>% mutate(bin_island=ifelse(cat_island=="island"|cat_island=="island_continent",1,0))
#Let's modify the dataset to deal with polytipic
join_dataset$polymorphism[which(join_dataset$polymorphism %in% c("polytipic","possible polytipic","pattern variable")==TRUE)]<-NA

7.2 Filter species with poor geographical information

Arachnids is one of the groups with the poorest geographic information available in public databases. For instance, in our data ~52% of the species has less than 50 geographical records

species_points<-join_dataset %>% drop_na(n_points)
species_geo<-nrow(species_points[species_points$n_points<50,])/nrow(species_points)*100
print(paste0(species_geo,"%"," of the species with geographical information has less than 50 geographical records"))
## [1] "52.0179372197309% of the species with geographical information has less than 50 geographical records"

To account for this, we decided to calculated the mean and its 95% confidence interval (CI) for the number of geographical records available for all the species. We excluded species from the subsequent analyses that fell outside the lower CI.

#Due to high vari
l_points<-na.omit(log(join_dataset$n_points))
##Let's calculate the 95% around the mean
library(boot)
data <- data.frame(xs = l_points)
bo <- boot(data[, "xs", drop = FALSE], statistic=meanfun, R=5000)
mean_ci<-boot.ci(bo, conf=0.95, type="bca")

ggplot(tibble(x=bo$t[,1]), aes(x=x)) +geom_density()+geom_segment(x=mean_ci$bca[4],xend=mean_ci$bca[5],y=0,yend=0,color="blue",size=2,lineend="round")

##Remove species with low number of records
datos_filtered<-join_dataset %>% filter(n_points>=exp(mean_ci$bca[4]))
#datos_filtered<-na.omit(datos_filtered)

##Let's save this dataset for further analyses

data_without_filtering<-datos_filtered

all the predictors seems skewed or not uniform distributed, let’s modify some predictors that may affect the regression due to their non-normal distribution

datos_filtered$T_centroid_lat<-abs(datos_filtered$centroid_lat)   
datos_filtered$T_lat_range<-sqrt(abs(datos_filtered$lat_range))
datos_filtered$T_area_polygon<-sqrt(datos_filtered$area_polygon+1)
datos_filtered$T_lat_range_wsc<-abs(datos_filtered$lat_range_wsc)
datos_filtered$T_area_countries_wsc<-sqrt(datos_filtered$area_countries_wsc)
datos_filtered$T_points<-log(datos_filtered$n_points)
datos_filtered$T_bole<-log(datos_filtered$bole_female)

Correlation plot between the variables

cor_matrix <- cor(na.omit(datos_filtered[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")]))

colnames(cor_matrix)<c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
rownames(cor_matrix)<-c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon","eco_reg_points","eco_reg_polygon","temp_zones_points","temp_zones_polygon")
par(mfrow=c(1,1))
corrplot(cor_matrix, method = "number", type = "upper", order = "original", tl.cex=1, )

7.3 Analyses

Remove species from islands that can affect calculations due to their geographic limit for dispersion

datos_filtered_total<-datos_filtered %>% filter(cat_island != "island") %>% data.frame()

Standardize variable before the analysis, excluding the count variables

#datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]<-standard_varibles(datos_filtered_total[,c("T_points","T_centroid_lat","T_bole","T_lat_range","T_area_polygon")]) %>% data.frame()

Number of colour monomorphic and polymorphic species

table(datos_filtered_total$polymorphism)
## 
##  no yes 
##  59  41

Let’s run the models!

7.3.1 Association between colour polymorphism and the latitude of the centroid

Evaluate if the colour monomorphic and colour polymorphic species differ in the latitude of the centroid

set.seed(30011994)
brm_centroid <- brm(
  T_centroid_lat ~ polymorphism,
  data = datos_filtered_total,
  family = skew_normal(),
  #prior = prior,
  control = list(adapt_delta = 0.999, max_treedepth=10)
  ,chains = 4, cores = 4, iter = 20000
  ,file="centroids3.rda"
)

pairs(brm_centroid)

summary(brm_centroid)
##  Family: skew_normal 
##   Links: mu = identity; sigma = identity; alpha = identity 
## Formula: T_centroid_lat ~ polymorphism 
##    Data: datos_filtered_total (Number of observations: 99) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept          36.68      2.45    31.87    41.49 1.00    29452    25133
## polymorphismyes    -8.88      3.83   -16.41    -1.34 1.00    28441    25674
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma    18.78      1.46    16.21    21.96 1.00    22399    21133
## alpha    -0.26      1.62    -3.76     2.82 1.00    19855    16728
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_centroid)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.055 (95% CI [3.434e-11, 0.142])
## Check if the predicted values fits the rawdata
pp_check(brm_centroid)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(brm_centroid))

They do not have differences in the latitude of the centroid

Evaluate if the colour monomorphic and colour polymorphic species differ in the body length

set.seed(30011994)
brm_bole <- brm(
  T_bole ~ polymorphism,
  data = datos_filtered_total,
  family = gaussian(),
  #prior = prior,
  control = list(adapt_delta = 0.999, max_treedepth=10)
  ,chains = 4, cores = 4, iter = 20000
  ,file="bole3.rda"
)

pairs(brm_bole)

summary(brm_bole)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: T_bole ~ polymorphism 
##    Data: datos_filtered_total (Number of observations: 99) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           2.12      0.08     1.97     2.28 1.00    31494    24510
## polymorphismyes     0.15      0.13    -0.09     0.40 1.00    31013    24525
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.62      0.05     0.54     0.71 1.00    30166    24838
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_bole)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.016 (95% CI [1.237e-10, 0.078])
## Check if the predicted values fits the rawdata
pp_check(brm_bole)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(brm_bole))

They do not have differences in their body length

let’s Evaluate the association fo the predictors

lm_lat_range <- lm(T_lat_range ~ polymorphism+T_bole+T_centroid_lat, data=datos_filtered_total)

check_collinearity(lm_lat_range)

The predictors are not collinear, we can use all of them in the models

7.3.2 Latitudinal range

set.seed(30011994)
brm_latrange_3 <- brm(
 T_lat_range ~ polymorphism+T_bole+T_centroid_lat,
  data = datos_filtered_total,
  family = gaussian(),
  control = list(adapt_delta = 0.999, max_treedepth=10)
  ,chains = 4, cores = 4, iter = 20000
 ,file="latrange3.rda"
)

pairs(brm_latrange_3)

summary(brm_latrange_3)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: T_lat_range ~ polymorphism + T_bole + T_centroid_lat 
##    Data: datos_filtered_total (Number of observations: 98) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           5.55      0.70     4.19     6.92 1.00    26745    24185
## polymorphismyes     0.33      0.29    -0.23     0.89 1.00    33868    26572
## T_bole              0.27      0.24    -0.21     0.75 1.00    30868    28291
## T_centroid_lat     -0.03      0.01    -0.04    -0.01 1.00    30220    26746
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     1.34      0.10     1.17     1.55 1.00    32104    26514
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_latrange_3)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.198 (95% CI [0.080, 0.315])
## Check if the predicted values fits the rawdata
pp_check(brm_latrange_3)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(brm_latrange_3))

7.3.3 Range size calculated with the area of the Convex polygon

area_polygon_3 <- brm(
 T_area_polygon ~ polymorphism+T_bole+T_centroid_lat,
  data =datos_filtered_total,
  family = skew_normal(),
  control = list(adapt_delta = 0.999, max_treedepth=10)
  ,chains = 4, cores = 4, iter = 20000
 ,file="areapoly3.rda"
)

pairs(area_polygon_3)

summary(area_polygon_3)
##  Family: skew_normal 
##   Links: mu = identity; sigma = identity; alpha = identity 
## Formula: T_area_polygon ~ polymorphism + T_bole + T_centroid_lat 
##    Data: datos_filtered_total (Number of observations: 97) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        1489.07    606.43   274.80  2662.32 1.00    23322    25656
## polymorphismyes   245.96    241.94  -227.62   726.34 1.00    28142    25177
## T_bole            298.34    209.63  -106.13   717.41 1.00    27849    26531
## T_centroid_lat      7.36      7.36    -6.94    21.84 1.00    25571    26897
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma  1262.95    101.77  1081.23  1480.30 1.00    21835    22705
## alpha     3.34      1.25     1.45     6.30 1.00    21364    15085
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(area_polygon_3)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.045 (95% CI [5.415e-04, 0.119])
## Check if the predicted values fits the rawdata
pp_check(area_polygon_3)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(area_polygon_3))

7.3.4 Ecological regions

To indirectly explore the difference in niche width between colour monomorphic and polymorphic species, we measured the number of ecological regions occupy by each species using the geographical records and polygons. the ecological regions were obtained here

BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on geographical records

set.seed(30011994)
eco_reg_points_3 <- brm(
 eco_reg_points ~  polymorphism+T_bole+T_centroid_lat,
  data = datos_filtered_total,
  family = negbinomial(),
  #prior = prior,
  control = list(adapt_delta = 0.999, max_treedepth=20)
  ,chains = 4, cores = 4, iter = 20000
 ,file="ecoreg_points3.rda"
)
## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(eco_reg_points_3)

summary(eco_reg_points_3)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: eco_reg_points ~ polymorphism + T_bole + T_centroid_lat 
##    Data: datos_filtered_total (Number of observations: 98) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           1.95      0.38     1.20     2.70 1.00    27427    27002
## polymorphismyes     0.25      0.16    -0.06     0.55 1.00    36094    27025
## T_bole              0.60      0.13     0.34     0.86 1.00    31383    28543
## T_centroid_lat     -0.00      0.00    -0.01     0.01 1.00    30568    27635
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     1.94      0.28     1.44     2.54 1.00    34757    28371
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(eco_reg_points_3)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.192 (95% CI [0.071, 0.334])
## Check if the predicted values fits the rawdata
pp_check(eco_reg_points_3)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(eco_reg_points_3))

BRMS model exploring differences in the ecological regions occupy by monomorphic and polymorphic species based on polygon

eco_reg_polygon_3 <- brm(
 eco_reg_polygon~ polymorphism+T_bole+T_centroid_lat,
  data = datos_filtered_total,
  family = negbinomial(),
  #prior = prior,
  control = list(adapt_delta = 0.999, max_treedepth=20)
  ,chains = 4, cores = 4, iter = 20000
 ,file="ecoreg_pol3.rda"
)
## Warning: Rows containing NAs were excluded from the model.
## Compiling Stan program...
## Start sampling
pairs(eco_reg_polygon_3)

summary(eco_reg_polygon_3)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: eco_reg_polygon ~ polymorphism + T_bole + T_centroid_lat 
##    Data: datos_filtered_total (Number of observations: 97) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           3.44      0.38     2.71     4.18 1.00    29834    26984
## polymorphismyes     0.38      0.17     0.05     0.71 1.00    32037    27029
## T_bole              0.39      0.14     0.11     0.67 1.00    32276    28756
## T_centroid_lat     -0.01      0.00    -0.02    -0.00 1.00    31393    26553
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     1.71      0.24     1.27     2.23 1.00    32882    28190
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(eco_reg_polygon_3)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.194 (95% CI [0.071, 0.337])
## Check if the predicted values fits the rawdata
pp_check(eco_reg_polygon_3)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(eco_reg_polygon_3))

7.3.5 Climatic zones

Addtionally, we also explored if colour monomorphic and polymorphic species differ in the number of climatic zones they occupy. We measured the number of climatic zones for each species using the geographical records and polygons. the Köppen-Geiger climate classification zones were obtained here

temp_zones_points_3 <- brm(
 temp_zones_points~ polymorphism+T_bole+T_centroid_lat ,
  data = datos_filtered_total,
  family = negbinomial(),
  #prior = prior,
  control = list(adapt_delta = 0.999, max_treedepth=20)
  ,chains = 4, cores = 4, iter = 10000
 ,file="temp_points3.rda"
)

pairs(temp_zones_points_3)

summary(temp_zones_points_3)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: temp_zones_points ~ polymorphism + T_bole + T_centroid_lat 
##    Data: datos_filtered_total (Number of observations: 98) 
##   Draws: 4 chains, each with iter = 10000; warmup = 5000; thin = 1;
##          total post-warmup draws = 20000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           1.21      0.21     0.79     1.62 1.00    11682    11514
## polymorphismyes     0.14      0.09    -0.03     0.30 1.00    14716    13505
## T_bole              0.30      0.07     0.16     0.44 1.00    12891    12256
## T_centroid_lat      0.01      0.00     0.00     0.01 1.00    16263    13986
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape    33.93     32.20     9.94   119.91 1.00    11751     9110
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(temp_zones_points_3)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.199 (95% CI [0.073, 0.333])
## Check if the predicted values fits the rawdata
pp_check(temp_zones_points_3)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(temp_zones_points_3))

temp_zones_polygon_3 <- brm(
 temp_zones_polygon~polymorphism+T_bole+T_centroid_lat ,
  data = datos_filtered_total,
  family = negbinomial(),
  #prior = prior,
  control = list(adapt_delta = 0.999, max_treedepth=20)
  ,chains = 4, cores = 4, iter = 10000
 ,file="temp_pol3.rda"
)

pairs(temp_zones_polygon_3)

summary(temp_zones_polygon_3)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: temp_zones_polygon ~ polymorphism + T_bole + T_centroid_lat 
##    Data: datos_filtered_total (Number of observations: 97) 
##   Draws: 4 chains, each with iter = 10000; warmup = 5000; thin = 1;
##          total post-warmup draws = 20000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           2.76      0.23     2.31     3.21 1.00    13969    11694
## polymorphismyes    -0.06      0.10    -0.25     0.12 1.00    16576    14449
## T_bole              0.07      0.08    -0.09     0.23 1.00    15339    12730
## T_centroid_lat     -0.01      0.00    -0.01    -0.00 1.00    16768    13725
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     8.16      2.22     4.83    13.45 1.00    17419    12693
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(temp_zones_polygon_3)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.189 (95% CI [0.060, 0.320])
## Check if the predicted values fits the rawdata
pp_check(temp_zones_polygon_3)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
plot(conditional_effects(temp_zones_polygon_3))

7.3.6 Summary of the models

all_models<-NULL

for(mod in c("brm_latrange_3","area_polygon_3","temp_zones_polygon_3","temp_zones_points_3")){
baye_mode = get(mod)
  bayes_results<-baye_mode %>% 
  spread_draws(b_polymorphismyes)
  bayes_results<-tibble(b_polymorphismyes=bayes_results$b_polymorphismyes,model=mod)
  all_models<-bind_rows(all_models, bayes_results)
} 

all_models %>% ggplot(aes(y = model, x = b_polymorphismyes)) +
  stat_halfeye()+
  theme_classic()+
  geom_vline(xintercept = 0, linetype = "dashed",col="black",size=1)+
  labs(x="Estimate",y="Models")

8 Island models

Considering that the presence on islands can be an indicator of range expansion, we also recorded whether species were distributed on islands by overlapping both sources of geographical distribution (occurrences and WSC) with the global shoreline vector from the islands database (Sayre et al., 2019).

We decided to test the association between colour polymorphism and the presence on islands using two datasets. The first one was using the species occurrences after discarding those species with poor geographic records as done here

##Remove species with low number of records
data_filtered_phylogeny<-data_filtered_phylogeny %>% drop_na(polymorphism) %>% drop_na(bin_island)
table(data_filtered_phylogeny$polymorphism)
## 
##  no yes 
##  60  35

This dataset includes 56 colour monomorphic species and 35 colour polymorphic species.


To increase the number of species in our analysis, we created a second dataset without filtering species based on the number of geographic records. We determined the presence on islands using the geographic distribution information available on the World Spider Catalog.

dataset_all_species_phylogeny<-dataset_all_species_phylogeny %>% drop_na(n_points) %>%drop_na(polymorphism) %>% drop_na(bin_island) %>% drop_na(cat_island_points)

table(dataset_all_species_phylogeny$polymorphism)
## 
##  no yes 
## 131  61

This second dataset includes 125 colour monomorphic species and 61 colour polymorphic species.

8.1 Presence on island using filtered dataset

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: bin_island ~ 1 + polymorphism + bole_female + (1 | gr(species, cov = A)) 
##    Data: data_filtered_phylogeny (Number of observations: 94) 
##   Draws: 4 chains, each with iter = 10000; warmup = 5000; thin = 1;
##          total post-warmup draws = 20000
## 
## Group-Level Effects: 
## ~species (Number of levels: 94) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     4.12      3.84     0.46    13.24 1.00     2525     3254
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           2.74      2.88    -1.75     8.67 1.00     7705     4824
## polymorphismyes     4.65      3.37     0.94    13.14 1.00     4488     2999
## bole_female         0.07      0.16    -0.15     0.43 1.00     6068     3615
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.481 (95% CI [0.076, 0.872])
##      Marginal R2: 0.011 (95% CI [1.128e-24, 0.217])

With this dataset there seems to be an association between colour polymorphism and the presence on islands

## .
##         0         1 
## 0.1864407 0.8135593
## .
##          0          1 
## 0.02857143 0.97142857
## standardGeneric for "plot" defined from package "base"
## 
## function (x, y, ...) 
## standardGeneric("plot")
## <environment: 0x0000000026a4aad8>
## Methods may be defined for arguments: x, y
## Use  showMethods(plot)  for currently available ones.

8.2 Presence on island using most of the species in phylogeny

tree=check_and_fix_ultrametric(join_tree)

missing<-tree$tip.label[which(tree$tip.label %in% dataset_all_species_phylogeny$species==FALSE)]

island_dataset_tree<-drop.tip(tree,missing)

island_dataset_tree$edge.length <- island_dataset_tree$edge.length / (max(island_dataset_tree$edge.length))

A <- ape::vcv(island_dataset_tree,corr = TRUE)

set.seed(30011994)

dataset_all_species_phylogeny$log_bole_female<-log(dataset_all_species_phylogeny$bole_female)

dataset_all_species_phylogeny<-dataset_all_species_phylogeny[!is.na(dataset_all_species_phylogeny$log_bole_female),]

brm_island_2 <- brm(
  bin_island ~ 1 + polymorphism + bole_female + (1| gr(species, cov = A))  ,
  data = dataset_all_species_phylogeny,
  family = bernoulli(),
  #prior = prior,
  data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=20)
  ,chains = 4, cores = 4, iter = 20000
  ,file = "island2.rda"
)
pairs(brm_island_2)

summary(brm_island_2)
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: bin_island ~ 1 + polymorphism + bole_female + (1 | gr(species, cov = A)) 
##    Data: dataset_all_species_phylogeny (Number of observations: 190) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Group-Level Effects: 
## ~species (Number of levels: 190) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)    10.79     22.52     1.67    58.54 1.00     2294     2437
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept          -3.69      9.87   -25.50     1.94 1.00     4126     2473
## polymorphismyes     3.39      6.78     0.17    16.54 1.00     4427     2685
## bole_female         0.46      0.87     0.05     2.33 1.00     3341     2533
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_island_2)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.669 (95% CI [0.399, 1.000])
##      Marginal R2: 0.214 (95% CI [2.796e-13, 0.412])
## Check if the predicted values fits the rawdata
pp_check(brm_island_2)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

##Preliminary plots
#plot(conditional_effects)

With this dataset there seems to be an association between colour polymorphism and the presence on islands

## .
##         0         1 
## 0.4573643 0.5426357
## .
##         0         1 
## 0.1803279 0.8196721

A similar pattern is obtained when with run the model without the phylogeny

set.seed(30011994)

brm_island_3 <- brm(
  bin_island ~ 1 + polymorphism + bole_female,
  data = dataset_all_species_phylogeny,
  family = bernoulli(),
  #prior = prior,
  #data2 = list(A = A),
  control = list(adapt_delta = 0.999, max_treedepth=10)
  ,chains = 4, cores = 4, iter = 20000
)
## Compiling Stan program...
## Start sampling
pairs(brm_island_3)

summary(brm_island_3)
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: bin_island ~ 1 + polymorphism + bole_female 
##    Data: dataset_all_species_phylogeny (Number of observations: 190) 
##   Draws: 4 chains, each with iter = 20000; warmup = 10000; thin = 1;
##          total post-warmup draws = 40000
## 
## Population-Level Effects: 
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept          -0.72      0.31    -1.35    -0.12 1.00    29369    24091
## polymorphismyes     1.23      0.39     0.47     2.02 1.00    25518    22547
## bole_female         0.11      0.03     0.05     0.18 1.00    23714    21102
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#r2
r2_bayes(brm_island_3)
## # Bayesian R2 with Compatibility Interval
## 
##   Conditional R2: 0.139 (95% CI [0.068, 0.215])
## Check if the predicted values fits the rawdata
pp_check(brm_island_3)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

9 Conclusion

Lets observe how polymorphic and monomorphic species differ according to all the predictor. To build this graph we will use the linear models with the total data set and the Island model with the highest number of species.

all_models<-NULL

for(mod in c("brm_latrange_1","area_polygon_1","eco_reg_points_1","eco_reg_polygon_1","temp_zones_polygon_1","temp_zones_points_1","brm_island_2")){
baye_mode = get(mod)
  bayes_results<-baye_mode %>% 
  spread_draws(b_polymorphismyes)
  bayes_results<-tibble(b_polymorphismyes=bayes_results$b_polymorphismyes,model=mod)
  all_models<-bind_rows(all_models, bayes_results)
} 

all_models$model<-factor(all_models$model,levels=c("brm_island_2","temp_zones_polygon_1","temp_zones_points_1","eco_reg_polygon_1","eco_reg_points_1","area_polygon_1","brm_latrange_1"))

na.omit(all_models) %>% ggplot(aes(y = model, x = b_polymorphismyes, fill = after_stat(x < 0))) +
  stat_halfeye()+
  theme_classic()+
  geom_vline(xintercept = 0, linetype = "dashed",col="black",size=0.8)+
  labs(x="Estimate",y="Models")+
  xlim(-0.5,5)+
  scale_fill_manual(values = c("#BCCAEF","#D66D79" ))
## Warning: Removed 45036 rows containing missing values or values outside the scale range
## (`stat_slabinterval()`).

From all the variables and data filtering explored, we can conclude that monomorphic and polymorphic spider species in our dataset only differ in their presence on islands